Microbial counts of source material and final cultures

# Uploading the files and creating subsets
fp <- file.path(getwd(), "..", "data", "cultures", "microbes", "viable_counts.xlsx")
df <- read.xlsx(fp, sheet = 1) # import in R the dataset (excel file) as dataframe

# let's transform the first variable in factor (useful for data exploration by groups)
df <- df %>% mutate_at(vars(sample, replicate),
                       as.factor)
# let's remove the repl. 1 because considered outlier from NGS analysis
df <- subset(df, replicate != "1")
df <- df[-2] # let's remove the "replicate" column

We analyzed 12 samples:

  1. RM = raw milk

  2. RM.H = raw milk heat treated (54°C for 54min)

  3. eRM = enriched raw milk (spontaneous fermentation at 10°C for 21d)

  4. eRM.H = enriched heated raw milk

  5. eRM.HS = enriched heated, salted raw milk (5% v/w NaCl)

  6. NWC = natural whey culture (thermophilic)

  7. eRWC.y = enriched raw milk whey culture, young. Mix NWC-eRM (1:10), incubated at 38°C for 6h

  8. eRWC.o = old, incubated at 38°C for 22h

  9. eRWC.H.y = mix NWC-eRM.H (1:10), young

  10. eRWC.H.o = mix NWC-eRM.H (1:10), old

  11. eRWC.HS.y = mix NWC-eRM.HS (1:10), young

  12. eRWC.HS.o = mix NWC-eRM.HS (1:10), old

Here is a summary statistic of the results divided for each sample:

dstats <- function(x)sapply(x, mystats)
dfstats <- by(df[2:9],
              list(sample = df$sample), dstats)
dfstats
## sample: eRM
##       Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n                   3.0          3.0          3.0             3.0         3.0
## mean                9.2          9.1          9.1             3.6         6.8
## stdev               0.0          0.2          0.2             0.6         1.3
## max                 9.2          9.3          9.3             4.0         8.0
## min                 9.2          8.9          8.9             2.9         5.4
##       Yeasts.and.molds Staphylococci Enterobacteriaceae
## n                  3.0           3.0                3.0
## mean               4.6           6.8                6.5
## stdev              0.4           0.5                0.8
## max                5.1           7.3                7.4
## min                4.3           6.3                5.8
## ------------------------------------------------------------ 
## sample: eRM.H
##       Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n                   3.0          3.0          3.0             3.0         3.0
## mean                7.9          7.7          7.6             3.1         7.0
## stdev               0.4          0.6          1.2             0.7         0.7
## max                 8.3          8.1          8.9             3.8         7.7
## min                 7.5          7.0          6.5             2.6         6.4
##       Yeasts.and.molds Staphylococci Enterobacteriaceae
## n                  3.0           3.0                3.0
## mean               3.8           6.6                6.8
## stdev              1.4           1.0                1.0
## max                5.4           7.7                7.4
## min                2.8           5.8                5.7
## ------------------------------------------------------------ 
## sample: eRM.HS
##       Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n                   3.0          3.0          3.0             3.0         3.0
## mean                6.6          6.1          4.5             0.4         4.7
## stdev               0.4          1.2          0.8             0.6         0.7
## max                 6.8          7.0          5.4             1.1         5.4
## min                 6.2          4.8          4.1             0.0         4.3
##       Yeasts.and.molds Staphylococci Enterobacteriaceae
## n                  3.0           3.0                3.0
## mean               1.3           6.0                3.7
## stdev              1.5           1.1                0.4
## max                2.9           6.7                4.2
## min                0.0           4.7                3.4
## ------------------------------------------------------------ 
## sample: eRWC.H.o
##       Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n                   3.0          3.0          3.0             3.0         3.0
## mean                7.2          7.2          7.1             1.8         6.1
## stdev               0.7          0.8          0.8             0.7         0.3
## max                 7.9          7.9          7.8             2.3         6.4
## min                 6.5          6.4          6.3             1.0         5.8
##       Yeasts.and.molds Staphylococci Enterobacteriaceae
## n                  3.0           3.0                3.0
## mean               5.7           3.3                3.6
## stdev              0.3           1.9                2.3
## max                5.9           5.4                5.0
## min                5.4           1.5                1.0
## ------------------------------------------------------------ 
## sample: eRWC.H.y
##       Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n                   3.0          3.0          3.0             3.0         3.0
## mean                8.3          7.7          7.8             2.3         5.9
## stdev               0.8          1.1          0.9             1.2         0.9
## max                 8.8          8.9          8.9             3.0         6.7
## min                 7.4          6.8          7.3             0.9         4.8
##       Yeasts.and.molds Staphylococci Enterobacteriaceae
## n                  3.0           3.0                3.0
## mean               4.4           5.5                4.9
## stdev              1.1           1.2                1.2
## max                5.2           6.6                5.7
## min                3.2           4.3                3.5
## ------------------------------------------------------------ 
## sample: eRWC.HS.o
##       Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n                   3.0          3.0          3.0             3.0         3.0
## mean                7.2          7.0          7.2             1.1         4.1
## stdev               0.7          0.5          1.1             1.2         1.9
## max                 7.9          7.6          8.0             2.3         5.4
## min                 6.5          6.7          6.0             0.0         2.0
##       Yeasts.and.molds Staphylococci Enterobacteriaceae
## n                  3.0           3.0                3.0
## mean               4.3           1.5                2.1
## stdev              2.1           1.1                1.9
## max                6.0           2.7                4.3
## min                2.0           0.5                1.0
## ------------------------------------------------------------ 
## sample: eRWC.HS.y
##       Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n                   3.0          3.0          3.0             3.0         3.0
## mean                8.4          7.7          7.1             1.2         3.3
## stdev               1.0          1.4          1.8             1.6         0.3
## max                 9.3          9.3          9.2             3.0         3.7
## min                 7.3          6.8          5.9             0.0         3.0
##       Yeasts.and.molds Staphylococci Enterobacteriaceae
## n                  3.0           3.0                3.0
## mean               2.6           3.0                1.6
## stdev              2.3           0.4                1.4
## max                4.6           3.4                2.4
## min                0.0           2.5                0.0
## ------------------------------------------------------------ 
## sample: eRWC.o
##       Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n                   3.0          3.0          3.0             3.0         3.0
## mean                7.3          6.5          7.4             1.7         5.5
## stdev               1.0          0.2          1.2             0.5         0.8
## max                 8.4          6.8          8.3             2.2         6.2
## min                 6.5          6.3          6.0             1.1         4.6
##       Yeasts.and.molds Staphylococci Enterobacteriaceae
## n                  3.0           3.0                3.0
## mean               5.7           3.2                4.8
## stdev              1.2           1.3                1.8
## max                6.5           4.7                6.6
## min                4.3           2.3                3.0
## ------------------------------------------------------------ 
## sample: eRWC.y
##       Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n                   3.0          3.0          3.0             3.0         3.0
## mean                8.0          7.5          7.9             2.1         5.8
## stdev               0.7          0.7          0.2             0.4         1.9
## max                 8.6          8.0          8.1             2.4         7.7
## min                 7.3          6.8          7.7             1.6         4.0
##       Yeasts.and.molds Staphylococci Enterobacteriaceae
## n                  3.0           3.0                3.0
## mean               4.1           4.0                3.2
## stdev              0.8           0.6                1.1
## max                4.7           4.7                4.4
## min                3.2           3.7                2.2
## ------------------------------------------------------------ 
## sample: NWC
##       Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n                   3.0          3.0          3.0             3.0         3.0
## mean                8.5          7.8          7.7             2.3         2.8
## stdev               0.7          1.1          1.3             1.2         0.6
## max                 9.1          9.1          9.1             3.0         3.3
## min                 7.7          7.0          6.4             1.0         2.1
##       Yeasts.and.molds Staphylococci Enterobacteriaceae
## n                  3.0           3.0                3.0
## mean               1.2           2.3                0.4
## stdev              2.0           0.4                0.7
## max                3.5           2.8                1.3
## min                0.0           2.0                0.0
## ------------------------------------------------------------ 
## sample: RM
##       Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n                   3.0          3.0          3.0             3.0         3.0
## mean                4.9          4.3          3.1             1.0         2.3
## stdev               0.9          0.9          0.6             0.1         0.3
## max                 5.4          5.3          3.7             1.1         2.7
## min                 3.9          3.5          2.4             0.8         2.0
##       Yeasts.and.molds Staphylococci Enterobacteriaceae
## n                  3.0           3.0                3.0
## mean               1.1           4.3                1.7
## stdev              0.3           0.3                0.3
## max                1.3           4.6                1.9
## min                0.8           4.1                1.3
## ------------------------------------------------------------ 
## sample: RM.H
##       Aerobic.mesophile Streptococci Lactobacilli FH.lactobacilli Enterococci
## n                   3.0          3.0          3.0               3         3.0
## mean                2.1          1.4          0.8               0         0.7
## stdev               0.2          0.2          0.3               0         0.3
## max                 2.4          1.7          1.2               0         1.0
## min                 1.9          1.2          0.5               0         0.4
##       Yeasts.and.molds Staphylococci Enterobacteriaceae
## n                    3             3                3.0
## mean                 0             1                0.3
## stdev                0             0                0.3
## max                  0             1                0.5
## min                  0             1                0.0

Data analysis: RM and eRM

Boxplot

This boxplot shows the spontaneous fermentation effect of RM on the concentration of different microbial groups.

# for boxplots, the dataset in melted form is preferred
df2 <- melt(df, id = "sample", value.name = "CFU_mL")

Statistics

Statistical analysis was performed in order to define whether the evaluated differences were significant. If the variances of the two groups resulted homogeneous to the Fisher’s test, a Two Sample t-test was applied; alternatively a Welch t-test was used.

# variance homogeneity (for interpretation http://www.sthda.com/english/wiki/f-test-compare-two-variances-in-r)
# If p-value>0.05 variances are homogeneous

sample_names <- c("RM", "eRM")

subdf2 <- filter(df2, sample %in% sample_names)
m_vars = unique(subdf2$variable)

for (x in m_vars) {
  test_two_grps(x, subdf2, sample_names)
}
## --------------------------------------------------------------------------------
## Aerobic.mesophile
## 
##  F test to compare two variances
## 
## data:  Aerobic.mesophile RM eRM
## F = 1259.2, num df = 2, denom df = 2, p-value = 0.001587
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##     32.28679 49108.21118
## sample estimates:
## ratio of variances 
##           1259.185
##      ------------------------------------------------------------------
## 
##  Welch Two Sample t-test
## 
## data:  Aerobic.mesophile RM eRM
## t = -8.3527, df = 2.0032, p-value = 0.01397
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.533301 -2.095297
## sample estimates:
## mean of x mean of y 
##  4.886033  9.200332
## --------------------------------------------------------------------------------
## Streptococci
## 
##  F test to compare two variances
## 
## data:  Streptococci RM eRM
## F = 19.668, num df = 2, denom df = 2, p-value = 0.09677
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##    0.5042989 767.0386708
## sample estimates:
## ratio of variances 
##           19.66766
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Streptococci RM eRM
## t = -8.9479, df = 4, p-value = 0.0008629
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.298119 -3.315205
## sample estimates:
## mean of x mean of y 
##  4.297291  9.103953
## --------------------------------------------------------------------------------
## Lactobacilli
## 
##  F test to compare two variances
## 
## data:  Lactobacilli RM eRM
## F = 7.3998, num df = 2, denom df = 2, p-value = 0.2381
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##    0.189739 288.593026
## sample estimates:
## ratio of variances 
##           7.399821
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Lactobacilli RM eRM
## t = -15.478, df = 4, p-value = 0.0001017
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -7.079545 -4.926023
## sample estimates:
## mean of x mean of y 
##  3.138678  9.141463
## --------------------------------------------------------------------------------
## FH.lactobacilli
## 
##  F test to compare two variances
## 
## data:  FH.lactobacilli RM eRM
## F = 0.066598, num df = 2, denom df = 2, p-value = 0.1249
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.001707631 2.597307275
## sample estimates:
## ratio of variances 
##         0.06659762
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  FH.lactobacilli RM eRM
## t = -7.4693, df = 4, p-value = 0.001717
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.520483 -1.612494
## sample estimates:
## mean of x mean of y 
##  1.012043  3.578532
## --------------------------------------------------------------------------------
## Enterococci
## 
##  F test to compare two variances
## 
## data:  Enterococci RM eRM
## F = 0.066201, num df = 2, denom df = 2, p-value = 0.1242
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.001697462 2.581839190
## sample estimates:
## ratio of variances 
##           0.066201
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Enterococci RM eRM
## t = -5.6935, df = 4, p-value = 0.004701
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.732632 -2.318703
## sample estimates:
## mean of x mean of y 
##  2.308226  6.833894
## --------------------------------------------------------------------------------
## Yeasts.and.molds
## 
##  F test to compare two variances
## 
## data:  Yeasts.and.molds RM eRM
## F = 0.36077, num df = 2, denom df = 2, p-value = 0.5302
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   0.00925047 14.06996547
## sample estimates:
## ratio of variances 
##          0.3607683
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Yeasts.and.molds RM eRM
## t = -12.302, df = 4, p-value = 0.0002508
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.250444 -2.685144
## sample estimates:
## mean of x mean of y 
##  1.135165  4.602959
## --------------------------------------------------------------------------------
## Staphylococci
## 
##  F test to compare two variances
## 
## data:  Staphylococci RM eRM
## F = 0.32397, num df = 2, denom df = 2, p-value = 0.4894
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   0.008306854 12.634724784
## sample estimates:
## ratio of variances 
##          0.3239673
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Staphylococci RM eRM
## t = -7.4707, df = 4, p-value = 0.001716
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.483523 -1.595822
## sample estimates:
## mean of x mean of y 
##  4.251834  6.791506
## --------------------------------------------------------------------------------
## Enterobacteriaceae
## 
##  F test to compare two variances
## 
## data:  Enterobacteriaceae RM eRM
## F = 0.17114, num df = 2, denom df = 2, p-value = 0.2923
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.004388145 6.674369066
## sample estimates:
## ratio of variances 
##          0.1711377
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Enterobacteriaceae RM eRM
## t = -9.1823, df = 4, p-value = 0.0007812
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.185142 -3.313153
## sample estimates:
## mean of x mean of y 
##  1.715616  6.464764

Data analysis: RM.H, eRM.H and eRM.HS

Statistics

Statistical analysis was performed in order to define whether the evaluated differences were significant. If the variances of the two groups resulted homogeneous to the Fisher’s test, a Two Sample t-test was applied; alternatively a Welch t-test was used.

sample_names <- c("RM.H", "eRM.H")

subdf3 <- filter(df2, sample %in% sample_names)
m_vars = unique(subdf3$variable)

for (x in m_vars) {
  test_two_grps(x, subdf3, sample_names)
}
## --------------------------------------------------------------------------------
## Aerobic.mesophile
## 
##  F test to compare two variances
## 
## data:  Aerobic.mesophile RM.H eRM.H
## F = 0.31509, num df = 2, denom df = 2, p-value = 0.4792
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   0.008079148 12.288383389
## sample estimates:
## ratio of variances 
##          0.3150868
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Aerobic.mesophile RM.H eRM.H
## t = -20.94, df = 4, p-value = 3.074e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.49363 -4.97324
## sample estimates:
## mean of x mean of y 
##  2.147064  7.880499
## --------------------------------------------------------------------------------
## Streptococci
## 
##  F test to compare two variances
## 
## data:  Streptococci RM.H eRM.H
## F = 0.14824, num df = 2, denom df = 2, p-value = 0.2582
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.00380113 5.78151875
## sample estimates:
## ratio of variances 
##          0.1482441
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Streptococci RM.H eRM.H
## t = -16.409, df = 4, p-value = 8.075e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -7.294157 -5.183017
## sample estimates:
## mean of x mean of y 
##  1.441008  7.679595
## --------------------------------------------------------------------------------
## Lactobacilli
## 
##  F test to compare two variances
## 
## data:  Lactobacilli RM.H eRM.H
## F = 0.074013, num df = 2, denom df = 2, p-value = 0.1378
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.001897766 2.886501454
## sample estimates:
## ratio of variances 
##         0.07401286
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Lactobacilli RM.H eRM.H
## t = -9.3259, df = 4, p-value = 0.0007359
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.768099 -4.745050
## sample estimates:
## mean of x mean of y 
## 0.8063764 7.5629511
## --------------------------------------------------------------------------------
## FH.lactobacilli
## 
##  F test to compare two variances
## 
## data:  FH.lactobacilli RM.H eRM.H
## F = 0, num df = 2, denom df = 2, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0 0
## sample estimates:
## ratio of variances 
##                  0
##      ------------------------------------------------------------------
## 
##  Welch Two Sample t-test
## 
## data:  FH.lactobacilli RM.H eRM.H
## t = -7.5974, df = 2, p-value = 0.01689
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.777612 -1.322770
## sample estimates:
## mean of x mean of y 
##  0.000000  3.050191
## --------------------------------------------------------------------------------
## Enterococci
## 
##  F test to compare two variances
## 
## data:  Enterococci RM.H eRM.H
## F = 0.2267, num df = 2, denom df = 2, p-value = 0.3696
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.005812843 8.841334385
## sample estimates:
## ratio of variances 
##          0.2267009
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Enterococci RM.H eRM.H
## t = -14.306, df = 4, p-value = 0.0001387
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -7.421818 -5.009321
## sample estimates:
## mean of x mean of y 
## 0.7391613 6.9547307
## --------------------------------------------------------------------------------
## Yeasts.and.molds
## 
##  F test to compare two variances
## 
## data:  Yeasts.and.molds RM.H eRM.H
## F = 0, num df = 2, denom df = 2, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0 0
## sample estimates:
## ratio of variances 
##                  0
##      ------------------------------------------------------------------
## 
##  Welch Two Sample t-test
## 
## data:  Yeasts.and.molds RM.H eRM.H
## t = -4.6892, df = 2, p-value = 0.04259
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -7.2665373 -0.3123594
## sample estimates:
## mean of x mean of y 
##  0.000000  3.789448
## --------------------------------------------------------------------------------
## Staphylococci
## 
##  F test to compare two variances
## 
## data:  Staphylococci RM.H eRM.H
## F = 0.0010706, num df = 2, denom df = 2, p-value = 0.002139
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  2.745184e-05 4.175425e-02
## sample estimates:
## ratio of variances 
##        0.001070622
##      ------------------------------------------------------------------
## 
##  Welch Two Sample t-test
## 
## data:  Staphylococci RM.H eRM.H
## t = -9.8263, df = 2.0043, p-value = 0.01013
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.054171 -3.155723
## sample estimates:
## mean of x mean of y 
##  1.006372  6.611319
## --------------------------------------------------------------------------------
## Enterobacteriaceae
## 
##  F test to compare two variances
## 
## data:  Enterobacteriaceae RM.H eRM.H
## F = 0.078156, num df = 2, denom df = 2, p-value = 0.145
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.002003998 3.048080460
## sample estimates:
## ratio of variances 
##         0.07815591
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Enterobacteriaceae RM.H eRM.H
## t = -11.127, df = 4, p-value = 0.0003712
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.125806 -4.880450
## sample estimates:
## mean of x mean of y 
## 0.2816993 6.7848271
sample_names <- c("RM.H", "eRM.HS")

subdf4 <- filter(df2, sample %in% sample_names)
m_vars = unique(subdf4$variable)

for (x in m_vars) {
  test_two_grps(x, subdf4, sample_names)
}
## --------------------------------------------------------------------------------
## Aerobic.mesophile
## 
##  F test to compare two variances
## 
## data:  Aerobic.mesophile RM.H eRM.HS
## F = 0.38303, num df = 2, denom df = 2, p-value = 0.5539
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   0.009821308 14.938209596
## sample estimates:
## ratio of variances 
##           0.383031
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Aerobic.mesophile RM.H eRM.HS
## t = -17.51, df = 4, p-value = 6.246e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.166273 -3.752135
## sample estimates:
## mean of x mean of y 
##  2.147064  6.606268
## --------------------------------------------------------------------------------
## Streptococci
## 
##  F test to compare two variances
## 
## data:  Streptococci RM.H eRM.HS
## F = 0.039259, num df = 2, denom df = 2, p-value = 0.07555
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.001006646 1.531108560
## sample estimates:
## ratio of variances 
##         0.03925919
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Streptococci RM.H eRM.HS
## t = -6.6938, df = 4, p-value = 0.002591
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.656113 -2.753282
## sample estimates:
## mean of x mean of y 
##  1.441008  6.145705
## --------------------------------------------------------------------------------
## Lactobacilli
## 
##  F test to compare two variances
## 
## data:  Lactobacilli RM.H eRM.HS
## F = 0.18764, num df = 2, denom df = 2, p-value = 0.316
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.004811186 7.317814009
## sample estimates:
## ratio of variances 
##          0.1876363
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Lactobacilli RM.H eRM.HS
## t = -7.8196, df = 4, p-value = 0.001444
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.070072 -2.413097
## sample estimates:
## mean of x mean of y 
## 0.8063764 4.5479608
## --------------------------------------------------------------------------------
## FH.lactobacilli
## 
##  F test to compare two variances
## 
## data:  FH.lactobacilli RM.H eRM.HS
## F = 0, num df = 2, denom df = 2, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0 0
## sample estimates:
## ratio of variances 
##                  0
##      ------------------------------------------------------------------
## 
##  Welch Two Sample t-test
## 
## data:  FH.lactobacilli RM.H eRM.HS
## t = -1, df = 2, p-value = 0.4226
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.874837  1.167706
## sample estimates:
## mean of x mean of y 
## 0.0000000 0.3535659
## --------------------------------------------------------------------------------
## Enterococci
## 
##  F test to compare two variances
## 
## data:  Enterococci RM.H eRM.HS
## F = 0.24012, num df = 2, denom df = 2, p-value = 0.3873
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.006156912 9.364662884
## sample estimates:
## ratio of variances 
##          0.2401196
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Enterococci RM.H eRM.HS
## t = -9.2414, df = 4, p-value = 0.0007622
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.100919 -2.744015
## sample estimates:
## mean of x mean of y 
## 0.7391613 4.6616279
## --------------------------------------------------------------------------------
## Yeasts.and.molds
## 
##  F test to compare two variances
## 
## data:  Yeasts.and.molds RM.H eRM.HS
## F = 0, num df = 2, denom df = 2, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0 0
## sample estimates:
## ratio of variances 
##                  0
##      ------------------------------------------------------------------
## 
##  Welch Two Sample t-test
## 
## data:  Yeasts.and.molds RM.H eRM.HS
## t = -1.5697, df = 2, p-value = 0.257
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.027149  2.339544
## sample estimates:
## mean of x mean of y 
##  0.000000  1.343803
## --------------------------------------------------------------------------------
## Staphylococci
## 
##  F test to compare two variances
## 
## data:  Staphylococci RM.H eRM.HS
## F = 0.00079951, num df = 2, denom df = 2, p-value = 0.001598
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  2.050016e-05 3.118074e-02
## sample estimates:
## ratio of variances 
##       0.0007995061
##      ------------------------------------------------------------------
## 
##  Welch Two Sample t-test
## 
## data:  Staphylococci RM.H eRM.HS
## t = -7.6195, df = 2.0032, p-value = 0.01672
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -7.864048 -2.193415
## sample estimates:
## mean of x mean of y 
##  1.006372  6.035103
## --------------------------------------------------------------------------------
## Enterobacteriaceae
## 
##  F test to compare two variances
## 
## data:  Enterobacteriaceae RM.H eRM.HS
## F = 0.4085, num df = 2, denom df = 2, p-value = 0.58
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   0.01047432 15.93144659
## sample estimates:
## ratio of variances 
##          0.4084986
##      ------------------------------------------------------------------
## 
##  Two Sample t-test
## 
## data:  Enterobacteriaceae RM.H eRM.HS
## t = -11.698, df = 4, p-value = 0.0003054
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.229203 -2.606700
## sample estimates:
## mean of x mean of y 
## 0.2816993 3.6996508

Boxplot

Here is shown the microbial enrichment of RM.H, also with addition of salt.

Statistics

Since here there are more then 2 groups, a one-way ANOVA model was needed for the statistical analysis. The variance homogeneity was previously checked using the Bartlett’s test. In case the assumptions of one-way ANOVA test are not met, a Kruskal-Wallis test is applied. A Multiple Mean Comparison test was eventually performed afterwards.

subdf5 <- filter(df2, sample %in% c("RM.H", "eRM.H", "eRM.HS"))
m_vars = unique(subdf5$variable)

for (x in m_vars) {
  test_more_grps(x, subdf5)
}
## --------------------------------------------------------------------------------
## Aerobic.mesophile
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Aerobic.mesophile
## Bartlett's K-squared = 0.54578, df = 2, p-value = 0.7612
##      ------------------------------------------------------------------
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## sample       2  54.38  27.190   223.1 2.34e-06 ***
## Residuals    6   0.73   0.122                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##      ------------------------------------------------------------------
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = CFU_mL ~ sample)
## 
## $sample
##                   diff       lwr        upr     p adj
## eRM.HS-eRM.H -1.274231 -2.148773 -0.3996888 0.0100609
## RM.H-eRM.H   -5.733435 -6.607977 -4.8588929 0.0000023
## RM.H-eRM.HS  -4.459204 -5.333746 -3.5846623 0.0000102
## --------------------------------------------------------------------------------
## Streptococci
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Streptococci
## Bartlett's K-squared = 3.3822, df = 2, p-value = 0.1843
##      ------------------------------------------------------------------
##             Df Sum Sq Mean Sq F value  Pr(>F)    
## sample       2  63.41   31.70   51.15 0.00017 ***
## Residuals    6   3.72    0.62                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##      ------------------------------------------------------------------

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = CFU_mL ~ sample)
## 
## $sample
##                   diff       lwr        upr     p adj
## eRM.HS-eRM.H -1.533890 -3.506316  0.4385357 0.1180403
## RM.H-eRM.H   -6.238587 -8.211013 -4.2661615 0.0001692
## RM.H-eRM.HS  -4.704697 -6.677123 -2.7322715 0.0008120
## --------------------------------------------------------------------------------
## Lactobacilli
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Lactobacilli
## Bartlett's K-squared = 2.2755, df = 2, p-value = 0.3205
##      ------------------------------------------------------------------
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## sample       2  68.74   34.37   47.89 0.000205 ***
## Residuals    6   4.31    0.72                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##      ------------------------------------------------------------------

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = CFU_mL ~ sample)
## 
## $sample
##                   diff       lwr        upr     p adj
## eRM.HS-eRM.H -3.014990 -5.137313 -0.8926677 0.0113229
## RM.H-eRM.H   -6.756575 -8.878897 -4.6342520 0.0001630
## RM.H-eRM.HS  -3.741584 -5.863907 -1.6192617 0.0039732
## --------------------------------------------------------------------------------
## FH.lactobacilli
## 
##  Bartlett test of homogeneity of variances
## 
## data:  FH.lactobacilli
## Bartlett's K-squared = Inf, df = 2, p-value < 2.2e-16
##      ------------------------------------------------------------------
## 
##  Kruskal-Wallis rank sum test
## 
## data:  FH.lactobacilli
## Kruskal-Wallis chi-squared = 6.72, df = 2, p-value = 0.03474
##      ------------------------------------------------------------------
##   Kruskal-Wallis rank sum test 
##   
##  data: x and g 
##  Kruskal-Wallis chi-squared = 6.72, df = 2, p-value = 0.03 
##   
##   
##                               Comparison of x by g                               
##                                   (Bonferroni)                                   
##  Col Mean-| 
##  Row Mean |      eRM.H     eRM.HS 
##  ---------+---------------------- 
##    eRM.HS |   1.959591 
##           |     0.1501 
##           | 
##      RM.H |   2.449489   0.489897 
##           |    0.0429*     1.0000 
##   
##  alpha = 0.05 
##  Reject Ho if p <= alpha
## --------------------------------------------------------------------------------
## Enterococci
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Enterococci
## Bartlett's K-squared = 0.93453, df = 2, p-value = 0.6267
##      ------------------------------------------------------------------
##             Df Sum Sq Mean Sq F value  Pr(>F)    
## sample       2  59.28  29.639   88.73 3.5e-05 ***
## Residuals    6   2.00   0.334                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##      ------------------------------------------------------------------

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = CFU_mL ~ sample)
## 
## $sample
##                   diff       lwr        upr     p adj
## eRM.HS-eRM.H -2.293103 -3.741002 -0.8452031 0.0067553
## RM.H-eRM.H   -6.215569 -7.663469 -4.7676698 0.0000291
## RM.H-eRM.HS  -3.922467 -5.370366 -2.4745670 0.0004034
## --------------------------------------------------------------------------------
## Yeasts.and.molds
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Yeasts.and.molds
## Bartlett's K-squared = Inf, df = 2, p-value < 2.2e-16
##      ------------------------------------------------------------------
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Yeasts.and.molds
## Kruskal-Wallis chi-squared = 5.8424, df = 2, p-value = 0.05387
## --------------------------------------------------------------------------------
## Staphylococci
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Staphylococci
## Bartlett's K-squared = 9.4948, df = 2, p-value = 0.008674
##      ------------------------------------------------------------------
## 
##  Kruskal-Wallis rank sum test
## 
## data:  Staphylococci
## Kruskal-Wallis chi-squared = 5.4222, df = 2, p-value = 0.06646
## --------------------------------------------------------------------------------
## Enterobacteriaceae
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Enterobacteriaceae
## Bartlett's K-squared = 2.6555, df = 2, p-value = 0.2651
##      ------------------------------------------------------------------
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## sample       2  63.49   31.75   78.93 4.91e-05 ***
## Residuals    6   2.41    0.40                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##      ------------------------------------------------------------------

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = CFU_mL ~ sample)
## 
## $sample
##                   diff       lwr       upr     p adj
## eRM.HS-eRM.H -3.085176 -4.673958 -1.496395 0.0024235
## RM.H-eRM.H   -6.503128 -8.091909 -4.914346 0.0000384
## RM.H-eRM.HS  -3.417951 -5.006733 -1.829170 0.0014148

Data analysis: NWC and final cultures

Boxplot

This boxplot shows the microbial composition of the final cultures, obtained by incubating the mixture of NWC and the enriched milks.

Testing hypotheses

Since here there are more then 2 groups, a one-way ANOVA model was needed for the statistical analysis. The variance homogeneity was previously checked using the Bartlett’s test. In case the assumptions of one-way ANOVA test are not met, a Kruskal-Wallis test is applied. A Multiple Mean Comparison test was eventually performed afterwards.

selected_samples <- c("NWC", "eRWC.y", "eRWC.o", "eRWC.H.y", "eRWC.H.o", "eRWC.HS.y", "eRWC.HS.o")

subdf6 <- filter(df2, sample %in% selected_samples)
m_vars = unique(subdf6$variable)

for (x in m_vars) {
  test_more_grps(x, subdf6)
}
## --------------------------------------------------------------------------------
## Aerobic.mesophile
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Aerobic.mesophile
## Bartlett's K-squared = 0.66898, df = 6, p-value = 0.9951
##      ------------------------------------------------------------------
##             Df Sum Sq Mean Sq F value Pr(>F)
## sample       6  6.232  1.0386   1.615  0.215
## Residuals   14  9.005  0.6432
## --------------------------------------------------------------------------------
## Streptococci
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Streptococci
## Bartlett's K-squared = 5.1667, df = 6, p-value = 0.5226
##      ------------------------------------------------------------------
##             Df Sum Sq Mean Sq F value Pr(>F)
## sample       6   3.93  0.6550   0.797  0.588
## Residuals   14  11.50  0.8217
## --------------------------------------------------------------------------------
## Lactobacilli
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Lactobacilli
## Bartlett's K-squared = 5.5485, df = 6, p-value = 0.4756
##      ------------------------------------------------------------------
##             Df Sum Sq Mean Sq F value Pr(>F)
## sample       6  1.978  0.3296   0.251  0.951
## Residuals   14 18.379  1.3128
## --------------------------------------------------------------------------------
## FH.lactobacilli
## 
##  Bartlett test of homogeneity of variances
## 
## data:  FH.lactobacilli
## Bartlett's K-squared = 4.4453, df = 6, p-value = 0.6166
##      ------------------------------------------------------------------
##             Df Sum Sq Mean Sq F value Pr(>F)
## sample       6  4.457  0.7429   0.677  0.671
## Residuals   14 15.372  1.0980
## --------------------------------------------------------------------------------
## Enterococci
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Enterococci
## Bartlett's K-squared = 8.6924, df = 6, p-value = 0.1916
##      ------------------------------------------------------------------
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample       6  33.15   5.525    4.21 0.0125 *
## Residuals   14  18.37   1.312                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##      ------------------------------------------------------------------
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = CFU_mL ~ sample)
## 
## $sample
##                            diff        lwr        upr     p adj
## eRWC.H.y-eRWC.H.o   -0.28049937 -3.4745060  2.9135072 0.9999160
## eRWC.HS.o-eRWC.H.o  -2.04331342 -5.2373200  1.1506932 0.3609038
## eRWC.HS.y-eRWC.H.o  -2.85237603 -6.0463826  0.3416306 0.0946776
## eRWC.o-eRWC.H.o     -0.65004973 -3.8440564  2.5439569 0.9907539
## eRWC.y-eRWC.H.o     -0.36719217 -3.5611988  2.8268145 0.9996009
## NWC-eRWC.H.o        -3.32247364 -6.5164803 -0.1284670 0.0391093
## eRWC.HS.o-eRWC.H.y  -1.76281405 -4.9568207  1.4311926 0.5195760
## eRWC.HS.y-eRWC.H.y  -2.57187665 -5.7658833  0.6221300 0.1560139
## eRWC.o-eRWC.H.y     -0.36955036 -3.5635570  2.8244563 0.9995860
## eRWC.y-eRWC.H.y     -0.08669279 -3.2806994  3.1073138 0.9999999
## NWC-eRWC.H.y        -3.04197427 -6.2359809  0.1520323 0.0666390
## eRWC.HS.y-eRWC.HS.o -0.80906261 -4.0030692  2.3849440 0.9723376
## eRWC.o-eRWC.HS.o     1.39326369 -1.8007429  4.5872703 0.7464837
## eRWC.y-eRWC.HS.o     1.67612125 -1.5178854  4.8701279 0.5730612
## NWC-eRWC.HS.o       -1.27916022 -4.4731668  1.9148464 0.8095251
## eRWC.o-eRWC.HS.y     2.20232629 -0.9916803  5.3963329 0.2854987
## eRWC.y-eRWC.HS.y     2.48518386 -0.7088228  5.6791905 0.1809168
## NWC-eRWC.HS.y       -0.47009762 -3.6641042  2.7239090 0.9983891
## eRWC.y-eRWC.o        0.28285757 -2.9111491  3.4768642 0.9999118
## NWC-eRWC.o          -2.67242391 -5.8664305  0.5215827 0.1308671
## NWC-eRWC.y          -2.95528148 -6.1492881  0.2387251 0.0783336
## --------------------------------------------------------------------------------
## Yeasts.and.molds
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Yeasts.and.molds
## Bartlett's K-squared = 7.1439, df = 6, p-value = 0.3077
##      ------------------------------------------------------------------
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample       6  48.08   8.013   3.237 0.0329 *
## Residuals   14  34.66   2.476                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##      ------------------------------------------------------------------

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = CFU_mL ~ sample)
## 
## $sample
##                             diff       lwr       upr     p adj
## eRWC.H.y-eRWC.H.o   -1.264466193 -5.651305  3.122373 0.9494909
## eRWC.HS.o-eRWC.H.o  -1.442574233 -5.829413  2.944265 0.9105900
## eRWC.HS.y-eRWC.H.o  -3.117058388 -7.503898  1.269781 0.2572630
## eRWC.o-eRWC.H.o     -0.007373058 -4.394212  4.379466 1.0000000
## eRWC.y-eRWC.H.o     -1.559289033 -5.946128  2.827550 0.8776525
## NWC-eRWC.H.o        -4.526231162 -8.913070 -0.139392 0.0411874
## eRWC.HS.o-eRWC.H.y  -0.178108041 -4.564947  4.208731 0.9999991
## eRWC.HS.y-eRWC.H.y  -1.852592195 -6.239431  2.534247 0.7717983
## eRWC.o-eRWC.H.y      1.257093135 -3.129746  5.643932 0.9508071
## eRWC.y-eRWC.H.y     -0.294822841 -4.681662  4.092016 0.9999825
## NWC-eRWC.H.y        -3.261764969 -7.648604  1.125074 0.2172994
## eRWC.HS.y-eRWC.HS.o -1.674484155 -6.061323  2.712355 0.8396806
## eRWC.o-eRWC.HS.o     1.435201176 -2.951638  5.822040 0.9124744
## eRWC.y-eRWC.HS.o    -0.116714800 -4.503554  4.270124 0.9999999
## NWC-eRWC.HS.o       -3.083656929 -7.470496  1.303182 0.2672361
## eRWC.o-eRWC.HS.y     3.109685330 -1.277154  7.496524 0.2594401
## eRWC.y-eRWC.HS.y     1.557769355 -2.829070  5.944608 0.8781182
## NWC-eRWC.HS.y       -1.409172774 -5.796012  2.977666 0.9189370
## eRWC.y-eRWC.o       -1.551915976 -5.938755  2.834923 0.8799031
## NWC-eRWC.o          -4.518858104 -8.905697 -0.132019 0.0416135
## NWC-eRWC.y          -2.966942129 -7.353781  1.419897 0.3043144
## --------------------------------------------------------------------------------
## Staphylococci
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Staphylococci
## Bartlett's K-squared = 6.3221, df = 6, p-value = 0.3881
##      ------------------------------------------------------------------
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample       6   29.4   4.901   3.965 0.0158 *
## Residuals   14   17.3   1.236                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##      ------------------------------------------------------------------

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = CFU_mL ~ sample)
## 
## $sample
##                           diff        lwr        upr     p adj
## eRWC.H.y-eRWC.H.o    2.2577275 -0.8416620  5.3571170 0.2349343
## eRWC.HS.o-eRWC.H.o  -1.7374481 -4.8368376  1.3619414 0.5027973
## eRWC.HS.y-eRWC.H.o  -0.3140636 -3.4134532  2.7853259 0.9998070
## eRWC.o-eRWC.H.o     -0.1088761 -3.2082657  2.9905134 0.9999996
## eRWC.y-eRWC.H.o      0.7616601 -2.3377294  3.8610496 0.9761075
## NWC-eRWC.H.o        -0.9986391 -4.0980286  2.1007504 0.9178924
## eRWC.HS.o-eRWC.H.y  -3.9951756 -7.0945652 -0.8957861 0.0083527
## eRWC.HS.y-eRWC.H.y  -2.5717911 -5.6711807  0.5275984 0.1360737
## eRWC.o-eRWC.H.y     -2.3666037 -5.4659932  0.7327859 0.1955352
## eRWC.y-eRWC.H.y     -1.4960674 -4.5954569  1.6033221 0.6569113
## NWC-eRWC.H.y        -3.2563666 -6.3557561 -0.1569771 0.0366836
## eRWC.HS.y-eRWC.HS.o  1.4233845 -1.6760051  4.5227740 0.7028500
## eRWC.o-eRWC.HS.o     1.6285720 -1.4708176  4.7279615 0.5717119
## eRWC.y-eRWC.HS.o     2.4991082 -0.6002813  5.5984977 0.1550668
## NWC-eRWC.HS.o        0.7388090 -2.3605805  3.8381985 0.9794171
## eRWC.o-eRWC.HS.y     0.2051875 -2.8942020  3.3045770 0.9999840
## eRWC.y-eRWC.HS.y     1.0757237 -2.0236658  4.1751133 0.8886383
## NWC-eRWC.HS.y       -0.6845755 -3.7839650  2.4148141 0.9859347
## eRWC.y-eRWC.o        0.8705362 -2.2288533  3.9699258 0.9550925
## NWC-eRWC.o          -0.8897630 -3.9891525  2.2096266 0.9504049
## NWC-eRWC.y          -1.7602992 -4.8596887  1.3390903 0.4886678
## --------------------------------------------------------------------------------
## Enterobacteriaceae
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Enterobacteriaceae
## Bartlett's K-squared = 2.7228, df = 6, p-value = 0.8427
##      ------------------------------------------------------------------
##             Df Sum Sq Mean Sq F value Pr(>F)  
## sample       6  50.14   8.357   3.368 0.0287 *
## Residuals   14  34.74   2.482                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##      ------------------------------------------------------------------

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = CFU_mL ~ sample)
## 
## $sample
##                            diff       lwr          upr     p adj
## eRWC.H.y-eRWC.H.o    1.21248499 -3.179522  5.604492081 0.9585151
## eRWC.HS.o-eRWC.H.o  -1.58228721 -5.974294  2.809719879 0.8710773
## eRWC.HS.y-eRWC.H.o  -2.06572471 -6.457732  2.326282379 0.6812679
## eRWC.o-eRWC.H.o      1.18780958 -3.204198  5.579816665 0.9622874
## eRWC.y-eRWC.H.o     -0.46109591 -4.853103  3.930911180 0.9997632
## NWC-eRWC.H.o        -3.21047179 -7.602479  1.181535292 0.2318928
## eRWC.HS.o-eRWC.H.y  -2.79477220 -7.186779  1.597234885 0.3664833
## eRWC.HS.y-eRWC.H.y  -3.27820970 -7.670217  1.113797384 0.2140684
## eRWC.o-eRWC.H.y     -0.02467542 -4.416683  4.367331671 1.0000000
## eRWC.y-eRWC.H.y     -1.67358090 -6.065588  2.718426186 0.8406891
## NWC-eRWC.H.y        -4.42295679 -8.814964 -0.030949702 0.0479018
## eRWC.HS.y-eRWC.HS.o -0.48343750 -4.875445  3.908569587 0.9996890
## eRWC.o-eRWC.HS.o     2.77009679 -1.621910  7.162103873 0.3758113
## eRWC.y-eRWC.HS.o     1.12119130 -3.270816  5.513198389 0.9712887
## NWC-eRWC.HS.o       -1.62818459 -6.020192  2.763822501 0.8562009
## eRWC.o-eRWC.HS.y     3.25353429 -1.138473  7.645541374 0.2204311
## eRWC.y-eRWC.HS.y     1.60462880 -2.787378  5.996635889 0.8639396
## NWC-eRWC.HS.y       -1.14474709 -5.536754  3.247260001 0.9682985
## eRWC.y-eRWC.o       -1.64890548 -6.040913  2.743101603 0.8492166
## NWC-eRWC.o          -4.39828137 -8.790288 -0.006274285 0.0495677
## NWC-eRWC.y          -2.74937589 -7.141383  1.642631199 0.3837524

16S rRNA gene amplicon sequencing - cultures

This R script will analyze 16S results from ASV counts to graphical visualization mainly using phyloseq http://joey711.github.io/phyloseq/import-data.html

Cultures

# import raw sequences that were already quality filtered as described in Dreier et al 2022
fp <- file.path(getwd(), "..", "data", "cultures", "ngs", "ASV_table.xlsx")
cult_physeq <- load_ngs_data(fp)
# Counts to relative abundance
# ASV counts transformed to relative abundance (%)
# Note: raw sequences were already quality filtered as described in Dreier et al 2022
# thus taxa are not further trimmed for ordination and diversity analysis
# since even low abundant species are informative
cult_physeqR  = transform_sample_counts(cult_physeq, function(x) x/sum(x)*100 )
# font sizes for plots
yaxis_tick_label_size = 10
yaxis_label_size = 10

title_size = 14
subtitle_size = 14
xaxis_tick_label_size = 12

Diversity

# Summarize diversity values in table
metadata <- as(sample_data(cult_physeq), "data.frame")
richness_table <- estimate_richness(cult_physeq)
richness_table["Sample"] <- metadata["sample_subtype"]
richness_table["replicate"] <- metadata["replicate"]
# Reorder columns
richness_table <- richness_table[, c(10, 11, 1, 2, 3, 4, 5, 6, 7, 8 , 9)]
richness_table
##                Sample replicate Observed    Chao1  se.chao1       ACE    se.ACE
## RM.2               RM         1      187 187.3333 0.9257269 187.45666 5.5240883
## NWC.2             NWC         1        8   8.0000 0.0000000   8.00000 1.2247449
## eRM.2             eRM         1       33  33.0000 0.2461830  33.38948 2.2959662
## eRM.H.2         eRM.H         1       27  27.0000 0.0000000  27.00000 1.6329932
## eRM.HS.2       eRM.HS         1       63  63.0000 0.0000000  63.00000 3.7838420
## eRWC.y.2       eRWC.y         1       19  19.0000 0.0000000  19.00000 1.3377121
## eRWC.H.y.2   eRWC.H.y         1       27  27.0000 0.1635511  27.57143 1.6471079
## eRWC.HS.y.2 eRWC.HS.y         1       13  13.0000 0.0000000  13.00000 1.3008873
## eRWC.o.2       eRWC.o         1       27  27.0000 0.0000000  27.00000 2.0184336
## eRWC.H.o.2   eRWC.H.o         1       29  29.0000 0.4913037  29.32346 2.1534176
## eRWC.HS.o.2 eRWC.HS.o         1       19  19.0000 0.0000000  19.00000 1.3377121
## RM.3               RM         2      178 178.0000 0.0000000 178.00000 2.2044388
## NWC.3             NWC         2       12  12.0000 0.0000000  12.00000 0.9574271
## eRM.3             eRM         2       25  25.0000 0.4898979  25.22141 2.1781484
## eRM.H.3         eRM.H         2       34  34.0000 0.1641974  34.65021 1.7217992
## eRM.HS.3       eRM.HS         2       38  38.0000 0.0000000  38.00000 2.7956733
## eRWC.y.3       eRWC.y         2       30  30.0000 0.2457980  30.66233 2.2014708
## eRWC.H.y.3   eRWC.H.y         2       24  25.0000 2.3108440  24.96505 2.2482432
## eRWC.HS.y.3 eRWC.HS.y         2       22  22.0000 0.1221261  22.71096 2.2389837
## eRWC.o.3       eRWC.o         2       31  31.0000 0.0000000  31.00000 1.6461098
## eRWC.H.o.3   eRWC.H.o         2       21  21.0000 0.0000000  21.00000 1.7994708
## eRWC.HS.o.3 eRWC.HS.o         2       25  25.0000 0.0000000  25.00000 1.3564660
## RM.4               RM         3      125 125.0000 0.4979960 125.18280 4.3009922
## NWC.4             NWC         3        9   9.0000 0.0000000   9.00000 0.9428090
## eRM.4             eRM         3       31  31.0000 0.4918694  31.51953 1.6129265
## eRM.H.4         eRM.H         3       48  48.0000 0.0000000  48.00000 0.9895285
## eRM.HS.4       eRM.HS         3       40  40.0000 0.4937104  40.25598 2.1854976
## eRWC.y.4       eRWC.y         3       30  30.0000 0.0000000  30.00000 2.5099801
## eRWC.H.y.4   eRWC.H.y         3       26  26.0000 0.0000000  26.00000 2.4806947
## eRWC.HS.y.4 eRWC.HS.y         3       18  19.0000 2.2998856  18.74896 2.0054355
## eRWC.o.4       eRWC.o         3       28  28.0000 0.0000000  28.00000 1.8516402
## eRWC.H.o.4   eRWC.H.o         3       25  25.0000 0.2449490  25.51689 2.1424747
## eRWC.HS.o.4 eRWC.HS.o         3       23  23.0000 0.0000000  23.00000 1.8177865
##               Shannon    Simpson InvSimpson     Fisher
## RM.2        3.6340335 0.90893777  10.981501 28.4912451
## NWC.2       0.2132335 0.08356654   1.091187  0.6995682
## eRM.2       1.7778360 0.75780040   4.128826  3.6674951
## eRM.H.2     2.1073510 0.82641140   5.760747  2.9650366
## eRM.HS.2    0.8408825 0.32036464   1.471377  6.3968941
## eRWC.y.2    0.6092499 0.23636872   1.309533  1.8483752
## eRWC.H.y.2  0.7112148 0.28082089   1.390474  2.6807118
## eRWC.HS.y.2 0.3231377 0.12569873   1.143770  1.0803211
## eRWC.o.2    1.3189694 0.59890631   2.493183  2.5140674
## eRWC.H.o.2  0.9781383 0.40779791   1.688613  2.7313235
## eRWC.HS.o.2 1.1188118 0.58937175   2.435293  1.7195216
## RM.3        3.4430962 0.89258676   9.309839 21.1232946
## NWC.3       0.3346318 0.13629215   1.157799  1.0285560
## eRM.3       0.4885485 0.17391688   1.210532  2.5633656
## eRM.H.3     2.2477054 0.82588566   5.743352  3.3467815
## eRM.HS.3    1.8897852 0.81178899   5.313185  3.6397647
## eRWC.y.3    0.5983486 0.28911774   1.406703  2.7803261
## eRWC.H.y.3  0.4232009 0.17189234   1.207572  2.2030228
## eRWC.HS.y.3 0.5881647 0.26935073   1.368646  1.9181873
## eRWC.o.3    1.5189914 0.68263234   3.150920  2.8420564
## eRWC.H.o.3  1.1320277 0.47805487   1.915910  1.8527952
## eRWC.HS.o.3 1.5286237 0.71463170   3.504243  2.2253392
## RM.4        2.3087924 0.78350260   4.618993 13.9592564
## NWC.4       0.2276936 0.08177288   1.089055  0.7896196
## eRM.4       0.4034391 0.12115649   1.137859  3.0807798
## eRM.H.4     2.1397921 0.78753973   4.706762  4.6158258
## eRM.HS.4    0.8119889 0.30681802   1.442623  3.7345737
## eRWC.y.4    0.2836259 0.09028779   1.099249  2.9459226
## eRWC.H.y.4  0.3643399 0.12334999   1.140706  2.5463340
## eRWC.HS.y.4 0.3116122 0.11367379   1.128253  1.6641025
## eRWC.o.4    0.4497640 0.14913289   1.175272  2.7356668
## eRWC.H.o.4  0.4514010 0.15689793   1.186096  2.4200027
## eRWC.HS.o.4 0.5752884 0.23815353   1.312600  2.1386689
# write to excel
write.xlsx(richness_table, '../tables/cultures_richness_table.xlsx', colNames = TRUE, rowNames = TRUE, asTable = FALSE, keepNA = FALSE, na.string = "NaN")

# Beta diversity Permanova for all samples
adonis2(distance(cult_physeq, method="bray") ~ sample_subtype, data = metadata, permutations = 9999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = distance(cult_physeq, method = "bray") ~ sample_subtype, data = metadata, permutations = 9999)
##                Df SumOfSqs      R2      F Pr(>F)  
## sample_subtype 10   5.1665 0.41167 1.5394 0.0176 *
## Residual       22   7.3837 0.58833                
## Total          32  12.5502 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# filter for cultures
cultures_only_phy <- subset_samples(cult_physeq, sample_type == "eRWC")
# cultures metadata
c_metadata <- as(sample_data(cultures_only_phy), "data.frame")
# Beta diversity Permanova
adonis2(distance(cultures_only_phy, method="bray") ~ sample_subtype, data = c_metadata, permutations = 9999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = distance(cultures_only_phy, method = "bray") ~ sample_subtype, data = c_metadata, permutations = 9999)
##                Df SumOfSqs      R2      F Pr(>F)
## sample_subtype  5   1.1375 0.23204 0.7252 0.7015
## Residual       12   3.7647 0.76796              
## Total          17   4.9022 1.00000

Diversity plot

Relative abundances

# percentage thresholds used for filtering
abund_thresh <- 1.0
sp_res_th <- 0.6

# Taxa agglomeration----
# Differently to Ord. & Div. plots, in bar charts a taxa filter is necessary to show the most abundant species
# Let's agglomerate at the Species level using the tax_glom function from phyloseq
# It requires "Kingdom" to be the first column of our tax_table
# (see probl. solved https://github.com/joey711/phyloseq/issues/1551)
tax_table(cult_physeqR) <- tax_table(cult_physeqR)[,c(2:10,1)] # Re-order the columns

# Let's agglomerate at the Species level
# i.e. different ASV belonging to the same species will be agglomerated
# their relative abundance will be summed
# further the data frame is melted to long format for filtering and plotting
cult_physeqR_tax <- cult_physeqR %>%
  tax_glom(taxrank = "Species") %>%
  psmelt()

# Select only the natural whey and the enriched raw milk-whey culture samples 
cult_physeqR_tax_samples = subset(cult_physeqR_tax, sample_type %in% c("eRWC", "NWC"))

# create a mapper to replace old lactobacilli names in table
fp = file.path(getwd(), "..", "data", "species_dict.json")
species_dict <- fromJSON(file = fp, simplify = TRUE)

# In order to get groups in the plots add a group category and rename replicates
cult_physeqR_tax_samples <- cult_physeqR_tax_samples %>%
  mutate(sample_subtype = substr(sample_ID, 1, nchar(sample_ID) - 2))  %>%
  mutate(replicate = replicate - 1)

# For plotting it looks nicer when the species names are separated by spaces
cult_physeqR_tax_samples <- cult_physeqR_tax_samples %>%
    mutate(Species = str_replace(Species, "_", " ")) %>%
    # update new lactobacilli names
    mutate(Species = recode_factor(Species, !!!species_dict)) %>%
    # change Species to sp. for unknown species names
    mutate(Species = str_replace(Species, "Species", "sp."))

# group data frame by Species, calculate mean rel. abundance
means <- ddply(cult_physeqR_tax_samples, ~Species, function(x) c(mean = mean(x$Abundance)))

# get the species names with high and low average relative abundance, respectively
# Species whose rel. abundance is higher than 1% (or threshold [abund_thresh] defined above)
high_ab_sp <- means[means$mean >= abund_thresh, "Species"]
# Species whose rel. abundance is less than 1%
low_ab_sp <- means[means$mean < abund_thresh, "Species"]

# Create two data frames with high and low avg. abundances
cult_physeqR_tax_high <- cult_physeqR_tax_samples[cult_physeqR_tax_samples$Species %in% high_ab_sp,]
cult_physeqR_tax_low <- cult_physeqR_tax_samples[cult_physeqR_tax_samples$Species %in% low_ab_sp,]

low_abund <- cult_physeqR_tax_low
lowabund_str <- paste(c("<", abund_thresh, "% avg. abundance"), collapse = " ")
low_abund$Species <- lowabund_str
cult_physeqR_tax_high_full <- rbind(cult_physeqR_tax_high, low_abund)


# convert Species to a character vector from a factor because R
cult_physeqR_tax_low$Species <- as.character(cult_physeqR_tax_low$Species)

# group data frame by Species, calculate max rel. abundance
maxs <- ddply(cult_physeqR_tax_low, ~Species, function(x) c(max = max(x$Abundance)))

# find Species whose max. abundance is less than 0.6% [threshold "sp_res_th" defined above]
remainder <- maxs[maxs$max <= sp_res_th,]$Species

# change their name to "Other species"
cult_physeqR_tax_low[cult_physeqR_tax_low$Species %in% remainder,]$Species <- 'Other species'
# create nicely formatted guides with the species names
ordered_species_list <- ordered_species_guides(cult_physeqR_tax_low$Species, "Other species")
ordered_species <- ordered_species_list[[1]]
ordered_sp_labels <- ordered_species_list[[2]]

ha_ordered_species_list <- ordered_species_guides(cult_physeqR_tax_high$Species, lowabund_str)
ha_ordered_species <- ha_ordered_species_list[[1]]
ha_ordered_sp_labels <- ha_ordered_species_list[[2]]

# sample_ID levels
sampleid_vector <- c("NWC.2", "NWC.3", "NWC.4",
                     "eRWC.y.2", "eRWC.y.3", "eRWC.y.4",
                     "eRWC.o.2", "eRWC.o.3", "eRWC.o.4",
                     "eRWC.H.y.2", "eRWC.H.y.3", "eRWC.H.y.4",
                     "eRWC.H.o.2", "eRWC.H.o.3", "eRWC.H.o.4",
                     "eRWC.HS.y.2", "eRWC.HS.y.3", "eRWC.HS.y.4",
                     "eRWC.HS.o.2", "eRWC.HS.o.3", "eRWC.HS.o.4")

# Colors are set manually
species_colors <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", 
                    "#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888")

# Define font size for both plots here
yaxis_tick_label_size = 10
yaxis_label_size = 10
xaxis_tick_label_size = 12
Xaxis_rot = 0
df_sorter = c("Species", "sample_subtype")
x_labels = c(".1",".2",".3")

# High abundant species plot
p1 <- draw_ha_stacked_plot(df = cult_physeqR_tax_high_full,
                           sampleid_vector = sampleid_vector,
                           x_factor = "sample_ID",
                           x_labels = x_labels,
                           species_colors = species_colors, 
                           df_sorter = df_sorter,
                           wrap_group = c("~sample_subtype"),
                           ordered_sp_labels = ha_ordered_sp_labels,
                           abund_thresh = abund_thresh,
                           sp_res_th = sp_res_th,
                           ordered_species = ha_ordered_species
                           )

# Low abundant species plot
p2 <- draw_la_stacked_plot(df = cult_physeqR_tax_low,
                           sampleid_vector = sampleid_vector,
                           x_labels = x_labels,
                           species_colors = species_colors,
                           ordered_sp_labels = ordered_sp_labels,
                           x_factor = "sample_ID",
                           ordered_species = ordered_species,
                           wrap_group = c("~sample_subtype"),
                           abund_thresh = abund_thresh,
                           sp_res_th = sp_res_th
                           )

16S rRNA gene amplicon sequencing - cheese

This R script will analyze 16S results from ASV counts to graphical visualization mainly using phyloseq http://joey711.github.io/phyloseq/import-data.html

Cheese

cheese_fp <- file.path(getwd(), "..", "data", "cheese", "ngs", "ASV_table.xlsx")
cheese_physeq = load_ngs_data(cheese_fp)
# remove thermized controls
cheese_physeq = subset_samples(cheese_physeq, sample_ID != "K302_DNA" & sample_ID != "K310_DNA") 
# since we removed the thermized sample, there may be taxa with count = 0
# this will eventually remove them
cheese_physeq <- prune_taxa(taxa_sums(cheese_physeq) > 1, cheese_physeq)
# get relative abundance data
cheese_physeqR <- transform_sample_counts(cheese_physeq, function(x) x/sum(x)*100 )
# font sizes for plots
yaxis_tick_label_size = 10
yaxis_label_size = 10

title_size = 14
subtitle_size = 14
xaxis_tick_label_size = 12

Distance Ordination Analysis

Cultures

# bray = "Bray-Curtis dissimilarity"
# NMDS = Non-metric Multidimensional Scaling
# import raw sequences that were already quality filtered as described in Dreier et al 2022
fp_culture <- file.path(getwd(), "..", "data", "cultures", "ngs", "ASV_table.xlsx")
cult_physeq <- load_ngs_data(fp_culture)
cult_physeqR  = transform_sample_counts(cult_physeq, function(x) x/sum(x)*100 )
ordi = ordinate(cult_physeqR, method = "NMDS", distance = "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2108115 
## Run 1 stress 0.2088326 
## ... New best solution
## ... Procrustes: rmse 0.1329443  max resid 0.3750953 
## Run 2 stress 0.2352694 
## Run 3 stress 0.215075 
## Run 4 stress 0.1980362 
## ... New best solution
## ... Procrustes: rmse 0.04436881  max resid 0.2204983 
## Run 5 stress 0.2480068 
## Run 6 stress 0.2404204 
## Run 7 stress 0.222634 
## Run 8 stress 0.2270414 
## Run 9 stress 0.2149473 
## Run 10 stress 0.2272627 
## Run 11 stress 0.2138737 
## Run 12 stress 0.1990086 
## Run 13 stress 0.2672146 
## Run 14 stress 0.220576 
## Run 15 stress 0.2263959 
## Run 16 stress 0.1985512 
## Run 17 stress 0.2297981 
## Run 18 stress 0.1990155 
## Run 19 stress 0.2144827 
## Run 20 stress 0.2328488 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      1: no. of iterations >= maxit
##     16: stress ratio > sratmax
##      3: scale factor of the gradient < sfgrmin
dnmds_plt <- plot_ordination(cult_physeqR, ordi, "sample_ID", color = "sample_type")
# replace Na with "None" in milk_treatment factor
NMDS_df <- dnmds_plt$data %>% replace_na(list(milk_treatment = "None"))
# plot with all the sample_types
cult_p_ord <- ggplot(NMDS_df,
                 aes(NMDS1, NMDS2,
                     color = milk_treatment,
                     shape = sample_type)) +
          geom_point(size = 6) +
          scale_color_manual(values = c("H" = "#117733",
                                        "HS" = "#332288",
                                        "None" = "#AA4499")) +
          labs(title = "NMDS (Bray-Curtis), milk & cultures") +
          guides(color = guide_legend(order = 1), 
                 shape = guide_legend(order = 2)) +
          theme(text = element_text(size = xaxis_tick_label_size),
                plot.title = element_text(size = title_size, hjust = 0.5, )) +
          geom_text(mapping = aes(label = culture_type), color = 001,
                    vjust = 0, hjust = 1.5, size = xaxis_tick_label_size/2)

Cheese

# bray = "Bray-Curtis dissimilarity"
# NMDS = Non-metric MultiDimentional Scaling
ordi = ordinate(cheese_physeqR, method = "NMDS", distance = "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1336254 
## Run 1 stress 0.1336254 
## ... Procrustes: rmse 5.248895e-05  max resid 0.0001167245 
## ... Similar to previous best
## Run 2 stress 0.1336254 
## ... Procrustes: rmse 0.0001015451  max resid 0.0002381264 
## ... Similar to previous best
## Run 3 stress 0.1336254 
## ... New best solution
## ... Procrustes: rmse 4.117213e-06  max resid 8.702852e-06 
## ... Similar to previous best
## Run 4 stress 0.1437675 
## Run 5 stress 0.1799776 
## Run 6 stress 0.1336254 
## ... Procrustes: rmse 6.414714e-05  max resid 0.0001440501 
## ... Similar to previous best
## Run 7 stress 0.1336254 
## ... Procrustes: rmse 4.504085e-05  max resid 9.1842e-05 
## ... Similar to previous best
## Run 8 stress 0.1336254 
## ... Procrustes: rmse 3.813832e-05  max resid 8.119956e-05 
## ... Similar to previous best
## Run 9 stress 0.1903222 
## Run 10 stress 0.1336254 
## ... Procrustes: rmse 0.0001145622  max resid 0.000254667 
## ... Similar to previous best
## Run 11 stress 0.1336254 
## ... Procrustes: rmse 2.327019e-05  max resid 5.289384e-05 
## ... Similar to previous best
## Run 12 stress 0.1336254 
## ... Procrustes: rmse 8.042166e-05  max resid 0.0001795095 
## ... Similar to previous best
## Run 13 stress 0.1336254 
## ... Procrustes: rmse 0.0001050267  max resid 0.0002551261 
## ... Similar to previous best
## Run 14 stress 0.1741222 
## Run 15 stress 0.1741986 
## Run 16 stress 0.1437675 
## Run 17 stress 0.2331284 
## Run 18 stress 0.1336254 
## ... Procrustes: rmse 2.773221e-05  max resid 5.694338e-05 
## ... Similar to previous best
## Run 19 stress 0.1336254 
## ... Procrustes: rmse 1.341918e-05  max resid 2.672063e-05 
## ... Similar to previous best
## Run 20 stress 0.2119651 
## *** Best solution repeated 10 times
dnmds_plt <- plot_ordination(cheese_physeqR, ordi, "sample_ID", color = "milk_treatment")

NMDS_df <- dnmds_plt$data
# plot with all the sample_types
ch_p_ord = ggplot(NMDS_df,
                  aes(NMDS1, NMDS2,
                      color = milk_treatment,
                      shape = culture_type)) +
                  geom_point(size = 6) +
                  scale_color_manual(values = c("H" = "#117733",
                                                "HS" = "#332288",
                                                "None" = "#AA4499",
                                                "Control" = "#888888")) +
                  labs(title = "NMDS (Bray-Curtis), cheese 120d") +
                  guides(color = guide_legend(order = 1), 
                         shape = guide_legend(order = 2)) +
                  theme(text = element_text(size = xaxis_tick_label_size),
                        plot.title = element_text(size = title_size, hjust = 0.5)) + 
                  geom_text(mapping = aes(label = production_day), color = 001,
                          vjust = 0.5, hjust = 1.75, size = xaxis_tick_label_size/2)
## Warning: Removed 15 rows containing missing values (`geom_text()`).
## Removed 15 rows containing missing values (`geom_text()`).

Diversity

# Diversity parameters plot----
# Summarize alpha-diversity values in table
metadata <- as(sample_data(cheese_physeq), "data.frame")
richness_table <- estimate_richness(cheese_physeq)
richness_table["Adjunct culture"] <- metadata["AdjunctCulture"]
richness_table["Production day"] <- metadata["production_day"]
# Reorder columns
richness_table <- richness_table[, c(10, 11, 1, 2, 3, 4, 5, 6, 7, 8 , 9)]
richness_table
##          Adjunct culture Production day Observed Chao1  se.chao1      ACE
## K301_DNA         Control              1       25    25 0.0000000 25.00000
## K303_DNA          eRWC.y              1       23    23 0.4890096      NaN
## K304_DNA        eRWC.H.y              1       27    27 0.4906534 27.54519
## K305_DNA       eRWC.HS.y              1       25    25 0.4898979 25.62109
## K306_DNA          eRWC.o              1       24    24 0.0000000 24.00000
## K307_DNA        eRWC.H.o              1       28    28 0.4909903 28.34823
## K308_DNA       eRWC.HS.o              1       27    27 0.2453267 28.05837
## K309_DNA         Control              2       18    18 0.0000000 18.00000
## K311_DNA          eRWC.y              2       20    20 0.0000000 20.00000
## K312_DNA        eRWC.H.y              2       29    29 0.0000000 29.00000
## K313_DNA       eRWC.HS.y              2       24    24 0.0000000 24.00000
## K314_DNA          eRWC.o              2       27    27 0.0000000 27.00000
## K315_DNA        eRWC.H.o              2       27    27 0.0000000 27.00000
## K316_DNA       eRWC.HS.o              2       24    24 0.0000000 24.00000
##            se.ACE  Shannon   Simpson InvSimpson   Fisher
## K301_DNA 2.000000 1.523419 0.6485980   2.845743 2.522969
## K303_DNA      NaN 1.294502 0.6385605   2.766715 2.136409
## K304_DNA 1.408130 1.461295 0.7130358   3.484755 2.555412
## K305_DNA 1.436585 1.467699 0.7218074   3.594632 2.307830
## K306_DNA 1.620185 1.366634 0.6789691   3.114965 2.203134
## K307_DNA 1.939789 1.491580 0.6977966   3.309029 2.723312
## K308_DNA 1.953896 1.440837 0.6901837   3.227719 2.566512
## K309_DNA 1.581139 1.071967 0.4649089   1.868841 1.687209
## K311_DNA 1.788854 1.195255 0.5412021   2.179609 1.881448
## K312_DNA 1.856953 1.689331 0.7426990   3.886499 2.712114
## K313_DNA 1.620185 1.547118 0.7358553   3.785803 2.204451
## K314_DNA 1.632993 1.322028 0.6130468   2.584292 2.561010
## K315_DNA 1.360828 1.732635 0.7466947   3.947805 2.638525
## K316_DNA 1.825742 1.352408 0.6385323   2.766499 2.293975
# write to excel
write.xlsx(richness_table, '../tables/cheese_richness_table.xlsx', colNames = TRUE, rowNames = TRUE, asTable = FALSE, keepNA = FALSE, na.string = "NaN")

# Beta diversity Permanova
adonis2(distance(cheese_physeq, method="bray") ~ AdjunctCulture, data = metadata, permutations = 9999)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = distance(cheese_physeq, method = "bray") ~ AdjunctCulture, data = metadata, permutations = 9999)
##                Df SumOfSqs      R2      F Pr(>F)  
## AdjunctCulture  6  0.67595 0.63871 2.0625 0.0365 *
## Residual        7  0.38236 0.36129                
## Total          13  1.05830 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Diversity plot

## Warning: Removed 1 rows containing missing values (`geom_point()`).

Relative abundances

# percentage thresholds used for filtering
abund_thresh <- 1.0
sp_res_th <- 0.6

# Taxa agglomeration----
# Differently to Ord. & Div. plots, in bar charts a taxa filter is necessary to show the most abundant species
# Let's agglomerate at the Species level using the tax_glom function from phyloseq
# It requires "Kingdom" to be the first column of our tax_table
# (see probl. solved https://github.com/joey711/phyloseq/issues/1551)
tax_table(cheese_physeqR) <- tax_table(cheese_physeqR)[,c(2:10,1)] # Re-order the columns


# Let's agglomerate at the Species level
# i.e. different ASV belonging to the same species will be agglomerated
# their relative abundance will be summed
# further the data frame is melted to long format for filtering and plotting
ch_physeqR_tax <- cheese_physeqR %>%
  tax_glom(taxrank = "Species") %>%
  psmelt()

# Select only the natural whey and the enriched raw milk-whey culture samples 
ch_physeqR_tax_samples <- ch_physeqR_tax

# create a mapper to replace old lactobacilli names in table
fp = file.path(getwd(), "..", "data", "species_dict.json")
species_dict <- fromJSON(file = fp, simplify = TRUE)

# In order to get groups in the plots add a group category and rename replicates # Cultures only
ch_physeqR_tax_samples <- ch_physeqR_tax_samples %>%
  mutate(SubExperiment = paste("Day", substr(SubExperiment, nchar(SubExperiment), nchar(SubExperiment))))

# For plotting it looks nicer when the species names are separated by spaces
ch_physeqR_tax_samples <- ch_physeqR_tax_samples %>%
    mutate(Species = str_replace(Species, "_", " ")) %>%
    # update new lactobacilli names
    mutate(Species = recode_factor(Species, !!!species_dict)) %>%
    # change Species to sp. for unknown species names
    mutate(Species = str_replace(Species, "Species", "sp."))

# group data frame by Species, calculate mean rel. abundance
ch_means <- ddply(ch_physeqR_tax_samples, ~Species, function(x) c(mean = mean(x$Abundance)))

# get the species names with high and low average relative abundance, respectively
# Species whose rel. abundance is higher than 1% (or threshold [abund_thresh] defined above)
ch_high_ab_sp <- ch_means[ch_means$mean >= abund_thresh, "Species"]
# Species whose rel. abundance is less than 1%
ch_low_ab_sp <- ch_means[ch_means$mean < abund_thresh, "Species"]

# Create two data frames with high and low avg. abundances
ch_physeqR_tax_high <- ch_physeqR_tax_samples[ch_physeqR_tax_samples$Species %in% ch_high_ab_sp,]
ch_physeqR_tax_low <- ch_physeqR_tax_samples[ch_physeqR_tax_samples$Species %in% ch_low_ab_sp,]
low_abund <- ch_physeqR_tax_low

lowabund_str <- paste(c("<", abund_thresh, "% avg. abundance"), collapse = " ")

low_abund$Species <- lowabund_str

ch_physeqR_tax_high_full <- rbind(ch_physeqR_tax_high, low_abund)

# convert Species to a character vector from a factor because R
ch_physeqR_tax_low$Species <- as.character(ch_physeqR_tax_low$Species)

# group data frame by Species, calculate max rel. abundance
ch_maxs <- ddply(ch_physeqR_tax_low, ~Species, function(x) c(max = max(x$Abundance)))

# find Species whose max. abundance is less than 0.6 % [threshold "sp_res_th" defined above]
ch_remainder <- ch_maxs[ch_maxs$max <= sp_res_th,]$Species

# change their name to "Other species"
ch_physeqR_tax_low[ch_physeqR_tax_low$Species %in% ch_remainder,]$Species <- 'Other species'
# create nicely formatted guides with the species names
ordered_species_list <- ordered_species_guides(ch_physeqR_tax_low$Species, "Other species")
ordered_species <- ordered_species_list[[1]]
ordered_sp_labels <- ordered_species_list[[2]]

ha_ordered_species_list <- ordered_species_guides(ch_physeqR_tax_high$Species, lowabund_str)
ha_ordered_species <- ha_ordered_species_list[[1]]
ha_ordered_sp_labels <- ha_ordered_species_list[[2]]

# Colors are set manually
species_colors <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", 
                    "#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888")

# Define font size for both plots here
yaxis_tick_label_size = 10
yaxis_label_size = 10
xaxis_tick_label_size = 12
Xaxis_rot = 45

# sample_ID levels
sampleid_vector <- c("Control.3.1", "eRWC.y.3.1", "eRWC.H.y.3.1",
                     "eRWC.HS.y.3.1", "eRWC.o.3.1", "eRWC.H.o.3.1",
                     "eRWC.HS.o.3.1", "Control.3.2", "eRWC.y.3.2",
                     "eRWC.H.y.3.2", "eRWC.HS.y.3.2", "eRWC.o.3.2",
                     "eRWC.H.o.3.2", "eRWC.HS.o.3.2")
sample_id_labels = as.vector(sapply(sampleid_vector, FUN = function(X) substr(X, 1, nchar(X) - 4)))

# Define font size for both plots here
yaxis_tick_label_size = 10
yaxis_label_size = 10
xaxis_tick_label_size = 12
Xaxis_rot = 0
df_sorter = c("Species", "sample_subtype")
x_labels = c(".1",".2",".3")

# High abundant species plot
p3 <- draw_ha_stacked_plot(df = ch_physeqR_tax_high_full,
                           sampleid_vector = sampleid_vector, 
                           x_factor = "AdjunctCulture_ID",
                           x_labels = sample_id_labels,
                           species_colors = species_colors,
                           df_sorter = df_sorter,
                           wrap_group = c("~SubExperiment"),
                           Xaxis_rot = 45,
                           ordered_sp_labels = ha_ordered_sp_labels,
                           abund_thresh = abund_thresh,
                           sp_res_th = sp_res_th,
                           ordered_species = ha_ordered_species
                           )

# Low abundant species plot
p4 <- draw_la_stacked_plot(df = ch_physeqR_tax_low,
                           sampleid_vector = sampleid_vector,
                           x_labels = sample_id_labels,
                           species_colors = species_colors,
                           ordered_sp_labels = ordered_sp_labels,
                           x_factor = "AdjunctCulture_ID",
                           ordered_species = ordered_species,
                           wrap_group = c("~SubExperiment"),
                           Xaxis_rot = 45,
                           abund_thresh = abund_thresh,
                           sp_res_th = sp_res_th
                           )

Microbial counts and chemical analysis of vat milk/cheese

# define paths explicitly
subset_fp <- file.path(getwd(), "..", "data", "cheese", "pivot")

meta_0d_fp <- file.path(subset_fp, "meta_microbes_0d_data.csv")
micro_0d_fp <- file.path(subset_fp, "piv_microbes_0d_data.csv")

meta_1d_fp <- file.path(subset_fp, "meta_microbes_1d_data.csv")
micro_1d_fp <- file.path(subset_fp, "piv_microbes_1d_data.csv")

meta_chem_1d_fp <- file.path(subset_fp, "meta_chem_1d_data.csv")
chem_1d_fp <- file.path(subset_fp, "piv_chem_1d_data.csv")

meta_60d_fp <- file.path(subset_fp, "meta_microbes_60d_data.csv")
micro_60d_fp <- file.path(subset_fp, "piv_microbes_60d_data.csv")

meta_120d_fp <- file.path(subset_fp, "meta_microbes_120d_data.csv")
micro_120d_fp <- file.path(subset_fp, "piv_microbes_120d_data.csv")

meta_chem_120d_fp <- file.path(subset_fp, "meta_chem_120d_data.csv")
chem_120d_fp <- file.path(subset_fp, "piv_chem_120d_data.csv")

Data import and sub-setting

micro_0d <- import_and_select_PCA_data(
              micro_0d_fp, meta_0d_fp, 
              select_samples = c("MilkTreatment", "raw")
              )


micro_1d <- import_and_select_PCA_data(
              micro_1d_fp, meta_1d_fp, 
              select_samples = c("MilkTreatment", "raw"),
              # remove the entire 304 sample due to missing measurements
              rm_samples = c("EH_304_cheese_1d")
              )


chem_1d <- import_and_select_PCA_data(
            chem_1d_fp, meta_chem_1d_fp, 
            select_samples = c("MilkTreatment", "raw"),
            rm_samples = c("EH_304_cheese_1d"),
            ## remove some highly correlated features
            redundant_f = c("L.Lactic.acid.proportion.of.TLA", "Lactic.acid..total.")
            )

# make sure we don't have the meta data twice
meta_len <- length(names(read.csv(meta_chem_1d_fp))) + 1
chem_1d_data <- chem_1d[,c(1, meta_len:length(chem_1d))]

# merge chemical and microbes data
dataset_1d <- merge(micro_1d, chem_1d_data, by = "sampleID")


micro_60d <- import_and_select_PCA_data(
              micro_60d_fp, meta_60d_fp,
              select_samples = c("MilkTreatment", "raw")
              )


micro_120d <- import_and_select_PCA_data(
                micro_120d_fp, meta_120d_fp,
                select_samples = c("MilkTreatment", "raw")
                )

# remove some highly correlated features & features with NAs (Force & Fracture)
hcorr_fc <- c("L.Lactic.acid.proportion.of.TLA",
              "Lactic.acid..total.",
              "Total.volatile.carboxylic.acids",
              "Biogenic.amine",
              "Chloride",
              "Force.at.33",
              "Force.at.fracture",
              "Fracture.deformation")

chem_120d <- import_and_select_PCA_data(chem_120d_fp, meta_chem_120d_fp, 
                                        select_samples = c("MilkTreatment", "raw"),
                                        redundant_f = hcorr_fc
                                        )

# make sure we don't have the meta data twice
meta_len <- length(names(read.csv(meta_chem_120d_fp))) + 1
chem_120d_data <- chem_120d[,c(1, meta_len:length(chem_120d))]

# merge chemical and microbes data
dataset_120d <- merge(micro_120d, chem_120d_data, by = "sampleID")

Supplementary Plots

sessionInfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] missMDA_1.18        phyloseq_1.42.0     rjson_0.2.21       
##  [4] magrittr_2.0.3      ggtext_0.1.2        vegan_2.6-4        
##  [7] lattice_0.20-45     permute_0.9-7       BiocManager_1.30.19
## [10] patchwork_1.1.2     FSA_0.9.4           agricolae_1.3-5    
## [13] FactoMineR_2.7      factoextra_1.0.7    reshape2_1.4.4     
## [16] outliers_0.15       lubridate_1.9.2     forcats_1.0.0      
## [19] stringr_1.5.0       dplyr_1.1.0         purrr_1.0.1        
## [22] readr_2.1.4         tidyr_1.3.0         tibble_3.2.0       
## [25] ggplot2_3.4.1       tidyverse_2.0.0     gridExtra_2.3      
## [28] plyr_1.8.8          openxlsx_4.2.5.2   
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.4.1        systemfonts_1.0.4      igraph_1.4.0          
##   [4] splines_4.2.2          AlgDesign_1.2.1        GenomeInfoDb_1.34.9   
##   [7] digest_0.6.31          foreach_1.5.2          htmltools_0.5.4       
##  [10] fansi_1.0.4            doParallel_1.0.17      cluster_2.1.4         
##  [13] tzdb_0.3.0             Biostrings_2.66.0      timechange_0.2.0      
##  [16] colorspace_2.1-0       ggrepel_0.9.3          textshaping_0.3.6     
##  [19] haven_2.5.2            xfun_0.37              crayon_1.5.2          
##  [22] RCurl_1.98-1.10        jsonlite_1.8.4         survival_3.5-3        
##  [25] iterators_1.0.14       ape_5.7                glue_1.6.2            
##  [28] gtable_0.3.1           zlibbioc_1.44.0        emmeans_1.8.4-1       
##  [31] XVector_0.38.0         questionr_0.7.8        car_3.1-1             
##  [34] Rhdf5lib_1.20.0        dunn.test_1.3.5        BiocGenerics_0.44.0   
##  [37] abind_1.4-5            scales_1.2.1           mvtnorm_1.1-3         
##  [40] DBI_1.1.3              rstatix_0.7.2          miniUI_0.1.1.1        
##  [43] Rcpp_1.0.10            xtable_1.8-4           gridtext_0.1.5        
##  [46] flashClust_1.01-2      stats4_4.2.2           DT_0.27               
##  [49] htmlwidgets_1.6.1      ellipsis_0.3.2         mice_3.15.0           
##  [52] pkgconfig_2.0.3        farver_2.1.1           multcompView_0.1-8    
##  [55] sass_0.4.5             utf8_1.2.3             tidyselect_1.2.0      
##  [58] labeling_0.4.2         rlang_1.0.6            later_1.3.0           
##  [61] munsell_0.5.0          tools_4.2.2            cachem_1.0.7          
##  [64] cli_3.6.0              generics_0.1.3         ade4_1.7-22           
##  [67] broom_1.0.3            evaluate_0.20          biomformat_1.26.0     
##  [70] fastmap_1.1.1          yaml_2.3.7             ragg_1.2.5            
##  [73] knitr_1.42             zip_2.2.2              nlme_3.1-162          
##  [76] mime_0.12              leaps_3.1              xml2_1.3.3            
##  [79] compiler_4.2.2         rstudioapi_0.14        ggsignif_0.6.4        
##  [82] klaR_1.7-1             bslib_0.4.2            stringi_1.7.12        
##  [85] highr_0.10             Matrix_1.5-1           commonmark_1.8.1      
##  [88] markdown_1.5           multtest_2.54.0        vctrs_0.5.2           
##  [91] pillar_1.8.1           lifecycle_1.0.3        rhdf5filters_1.10.0   
##  [94] combinat_0.0-8         jquerylib_0.1.4        estimability_1.4.1    
##  [97] data.table_1.14.8      bitops_1.0-7           httpuv_1.6.9          
## [100] R6_2.5.1               promises_1.2.0.1       IRanges_2.32.0        
## [103] codetools_0.2-19       MASS_7.3-58.2          rhdf5_2.42.0          
## [106] withr_2.5.0            S4Vectors_0.36.1       GenomeInfoDbData_1.2.9
## [109] mgcv_1.8-41            parallel_4.2.2         hms_1.1.2             
## [112] grid_4.2.2             labelled_2.10.0        coda_0.19-4           
## [115] rmarkdown_2.20         carData_3.0-5          ggpubr_0.6.0          
## [118] scatterplot3d_0.3-42   Biobase_2.58.0         shiny_1.7.4